* this do-file constructs the .dta file necessary for calculating the Hirsch index, by period

set more off
cd d:\data


use pat80_11_16c.dta, clear

encode country, gen(country_i)
recode country_i (1=11) (2=9) (3=10) (4=13) (5=3) (6=4) (7=5) (8=6) (9=12) (10=14) (11=2) (12=8) (13=15) (14=16) (15=7) (16=1)
label define ccode 1 "US" 2 "JP" 3 "DE" 4 "EU" 5 "FR" 6 "GB" 7 "TW" 8 "KR" 9 "CA" 10 "CH" 11 "AU" 12 "IL" 13 "CN" 14 "IN" 15 "RU" 16 "RW", replace
label values country_i ccode

sa pat80_11_ccode, replace

use pat80_11_ccode, replace

keep patent gyear country* subcat cat p
gen cited = patent
so cited


mer 1:n cited using cite80_11.dta
tab _m
drop if _m==2
drop _m

sa temp, replace


forvalues i = 1(1)16{

use temp, replace

keep if country_i==`i'

bysort cited:gen cite_order = _n
bysort cited: egen z = max(cite_order)
keep if cite_order==1|cite_order==.
replace z = 0 if cite_order==.

drop cite_order

ren z tcit

drop cited

gen _tcit = -tcit

sort country p _tcit

drop _tcit

by country p: egen total=count(patent)
by country p: gen y=_n
by country p: gen x1=(tcit>=y)
by country p: gen x2=x1*y
by country p: egen h=max(x2)
keep if y==1
keep country p h

sa h_`i', replace
}

use h_1, clear

forvalues i = 2(1)16 {
append using h_`i'
}

sa h_index, replace

reshape wide h, i(country) j(p)
